from scrapy.exceptions import DropItem
import re
import jieba
import sys
class FilterWordsPipeline(object):
    """A pipeline for filtering out items which contain certain words in their
    description"""

    def process_item(self, item, spider):
        old = sys.stdout
        file=open(r'F:\file\test1.txt','a')
        sys.stdout=file
        title=item['title']
        title=re.sub("[\s+\.\!\/_,$%^*()-+\"\']+|[+——！？，。？→、~@#￥%……&*（）《》～“”：【】-]+","",title)
        # seg_list = jieba.cut(title,cut_all=False)
        # title = (" ".join(seg_list))
        print(title)
        content=item['content']
        content=re.sub("[\s+\.\!\/_,$%^*()-;+\"\']+|[+——！？，。？→、~@#￥%……&*（）〉《》～“”；：【】-]+","",content)
        # seg_list2 = jieba.cut(content,cut_all=False)
        # content = (" ".join(seg_list2))
        print(content)
        sys.stdout=old
        file.close()